# -*- coding:utf-8 -*-
# --author： jingfeng 
# time: 2019/2/13


'http://kaijiang.zhcw.com/zhcw/html/ssq/list_1.html'
"""
抓取双色球  开奖时间   期数   奖号     

"""
from urllib import request
from fake_useragent import UserAgent
from bs4 import BeautifulSoup
import re
import time

class SSQ(object):

    def __init__(self):
        self.ua = UserAgent()
        self.headers = {'User-Agent': self.ua.random}
        self.file = open('info.txt','w',encoding='utf-8')
        self.file.write('lottery_time, number, wining_number,'
                        'slae_moneys,first_prize,second_prize'+'\n')

    def get_number_page(self):
        """获取总页数"""
        url = 'http://kaijiang.zhcw.com/zhcw/html/ssq/list_1.html'
        req = request.Request(url=url, headers=self.headers)
        response = request.urlopen(req)
        html = response.read().decode('utf-8')
        soup = BeautifulSoup(html, 'html5lib')
        num_page = soup.find('p', class_="pg").find('strong').get_text()
        return num_page

    def get_ssq_info(self):
        """循环页数获取信息"""

        pages = self.get_number_page()
        for i in range(1, int(pages) + 1):

            url = 'http://kaijiang.zhcw.com/zhcw/html/ssq/list_%s.html' % str(i)
            req = request.Request(url=url, headers=self.headers)
            response = request.urlopen(req)
            html = response.read().decode('utf-8')
            soup = BeautifulSoup(html, 'html5lib')
            trs = soup.find_all('tr')

            for tr in trs[2:-1]:
                # 开奖时间
                lottery_time = tr.find_all('td')[0].get_text()
                print(lottery_time)
                # 期号
                number = tr.find_all('td')[1].get_text()
                print(number)
                # # 中奖号码
                win_numbers = tr.find_all('td')[2].find_all('em')
                list_num = []
                for num in win_numbers:
                    i = num.get_text()
                    list_num.append(i)
                wining_number = '-'.join(list_num)
                print(wining_number)
                # 销售额（元）

                slae_moneys = tr.find_all('td')[3].find('strong').get_text()
                print(slae_moneys)
                #
                # # 中奖注数

                first_prize = tr.find_all('td')[4].get_text().replace(
                                             '\n', '')
                first_prize = re.sub('\s+', '', first_prize)
                print(first_prize)
                second_prize = tr.find_all('td')[5].find('strong').get_text()
                print(second_prize)
                info = ','.join([lottery_time,number,wining_number,slae_moneys,\
                          first_prize,second_prize])

                self.file.write(str(info)+'\n')
                self.file.flush()
                time.sleep(1)
            time.sleep(5)

if __name__ == '__main__':
    ssq = SSQ()

    #
    ssq.get_ssq_info()